### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### 
### this file reproduces Table A4 and Figure A5 in the Supplementary Appendix
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### 

library(tidyverse)
library(here)
library(haven)
library(fastDummies)
library(glmnet)
library(modelsummary)
library(ggpubr)
library(estimatr)
library(wesanderson)
library(Matrix)

rm(list=ls())

## program to run lasso with rhs and dv provided. 
get_nonzero_coefficients <- function(rhs, dv) {
  lasso <- glmnet(rhs, dv)
  lambda_1se <- lasso$lambda[which(lasso$lambda >= mean(lasso$lambda) - sd(lasso$lambda))]
  lambda_1se <- lambda_1se[which.min(abs(lambda_1se - mean(lasso$lambda)))]
  coefmat <- as.matrix(coef(lasso, s = lambda_1se))
  coefmat <- as.data.frame(coefmat)
  nonzero <- coefmat %>% filter(s1 > 0)
  row_names_vector <- as.vector(row.names(nonzero))
  row_names_vector <- row_names_vector[-1]
  return(row_names_vector)
}

d<-readRDS(here("Data","cleandata.rds"))

d <- d %>%
  mutate(imposs = if_else(manipcheck2_4 == "It is impossible to know the consequences", 1, 0))

d <- d %>%
  mutate(petition_ordinal = recode(petition, "Yes" = 2, "Maybe" = 1, "No" = 0))

### create matrix of control variables for the lasso selection: 
vars<- d %>% select(pool_concede, pool_concede_notstd, treat, info, certain, uncertain, control,
                    debtpreA, debtpreB, debtpreC,
                    partyid, age, Q9, inc, education,
                    approval_biden, RWnews, urbrur, passcheck,
                    wait_pool, blame_mult, signpet,ceiling_import,imposs)

vars <- vars %>% drop_na(imposs)


vars <- vars %>%
  mutate(education = recode(education,
                            "Associates or technical degree" = "some college",
                            "Bachelor’s degree" = "college graduate",
                            "Graduate or professional degree (MA, MS, MBA, PhD, JD, MD, DDS etc.)" = "college graduate",
                            "High school diploma or GED" = "high school or less",
                            "Prefer not to say" = "high school or less",
                            "Some college, but no degree" = "some college",
                            "Some high school or less" = "high school or less"))

vars <- vars %>%
  mutate(education = recode(education,
                            "high school or less" = 1,
                            "some college" = 2,
                            "college graduate" = 3))

## replace missing covariates with mean or median 
vars <- vars %>%
  mutate(inc = ifelse(is.na(inc), median(inc, na.rm = TRUE), inc))

vars <- vars %>%
  mutate(urbrur = ifelse(is.na(urbrur), median(urbrur, na.rm = TRUE), urbrur))

vars <- vars %>%
  mutate(approval_biden = ifelse(is.na(approval_biden), median(approval_biden, na.rm = TRUE), approval_biden))

vars <- vars %>%
  mutate(female_dummy = ifelse(Q9 == "Female", 1, 0))

rhs <- vars %>% select(debtpreA,debtpreB,debtpreC,partyid,age,female_dummy,inc,education,approval_biden,RWnews,urbrur,passcheck)
## create dummies for lassomodel. 
rhs <- rhs %>% fastDummies::dummy_cols("partyid")
rhs <- rhs %>% fastDummies::dummy_cols("inc")
rhs <- rhs %>% fastDummies::dummy_cols("education")
rhs <- rhs %>% fastDummies::dummy_cols("RWnews")
rhs <- rhs %>% fastDummies::dummy_cols("approval_biden")
rhs <- rhs %>% fastDummies::dummy_cols("urbrur")

sumvars <- as.data.frame(rhs)
dvsum <- vars %>% select(imposs)
treatsum <- vars %>% select(certain, uncertain, info)
sumvars <- cbind(sumvars,dvsum,treatsum)
rm(dvsum, treatsum)

rhs <- rhs %>%
  select(-partyid, -inc, -education, -RWnews, -approval_biden, -urbrur )

rhs<-makeX(rhs, na.impute = TRUE)
dv <- as.matrix(vars$imposs)
row_names_vector <- get_nonzero_coefficients(rhs, dv)
vars <- cbind(vars,rhs)


# Comparison of Certain and Uncertain vs. No Info 
formula <- as.formula(paste("imposs ~ certain + uncertain +", paste(row_names_vector, collapse = "+")))
lm_certain <- lm_robust(formula, d=vars)
modelsummary(lm_certain, stars=TRUE)

# Comparison of Certain vs. Uncertain 
formula <- as.formula(paste("imposs ~ certain + control +", paste(row_names_vector, collapse = "+")))
lm_certain2 <- lm_robust(formula, d=vars)
modelsummary(lm_certain2, stars=TRUE)

models<-list(
  "Certain & Uncertain vs. Control" = lm_certain, 
  "Certain vs. Uncertain" = lm_certain2)

names<- c("info" = "Information", "certain"="Certain", "uncertain"="Uncertain", "control"="Control")
#          "Debt General" = "debtpreA", "Debt Taxes" = "debtpreC",
notelist <- paste(row_names_vector, collapse = ", ")
note <- paste("Non-zero covariates include: ", notelist, collapse="")
modelsummary(models, stars=T,  notes=note , output="latex")


modelsummary(models, stars=T,
             notes=note )

cmap <- c('certain' = 'Certain', 'uncertain' = 'Uncertain', 'info' = 'Information')
coefplot<- modelplot(models, coef_map = cmap ) + labs(x = " ")
coefplot2 <- coefplot  +   scale_color_manual(values = c("#899DA4", "#C93312", "black", "#DC863B")) + 
  theme(panel.background = element_rect(fill = "grey95"),
        panel.grid.major = element_line(color = "white", size = 0.5),
        panel.grid.minor = element_line(color = "white", size = 0.25)) + 
  ggtitle("Outcome: Impossilbe To Know Consequences") + theme(plot.title = element_text(hjust = 0.5),
                                                              legend.position = c(.85, .20),
                                                              legend.background = element_rect(fill = "white", colour = "black"), # White background with black border
                                                              legend.key = element_rect(fill = "white", colour = "white")) +
  geom_vline(xintercept = 0, linetype="dashed", color = "black") 

coefplot2

#### #### #### #### #### #### #### #### #### #### #### #### 
#### GPT4 Analysis: 
#### #### #### #### #### #### #### #### #### #### #### #### 

# Here we show the construction of the table. Below we show an example of the
# prompt and API call to create the data


rm(list=ls())
ten_scale <- readRDS(here("Data","certaintycheck1.rds"))
five_scale <-readRDS(here("Data","certaintycheck2.rds"))
hundo_scale <- readRDS(here("Data","certaintycheck3.rds"))

man1 <- lm_robust(d=ten_scale, new_numeric_column ~ certain + control )
man2 <- lm_robust(d=five_scale, new_numeric_column ~ certain + control )
man3 <-  lm_robust(d=hundo_scale, new_numeric_column ~ certain + control )

models<-list(
  "5 Point Scale" = man2,
  "11 Point Scale" = man1, 
  "101 Point Scale" = man3)
modelsummary(models, stars=TRUE)

modelsummary(models, stars=TRUE, output="latex")

### ### ### ### ### ### ### ### ### 
### Example of API Calls ### ### ### 
### ### ### ### ### ### ### ### ### 

library(tidyverse)
library(readxl)
library(here)
library(openai)
library(httr)
library(jsonlite)
library(ggplot2)
rm(list = ls())


## Program to calculate max tokens in an object 
calculate_tokens <- function(text) {
  words <- strsplit(text, " ")[[1]]
  num_tokens <- length(words) + length(words) - 1  # Adding spaces
  return(num_tokens)
}

### input API key 
Sys.setenv(
  OPENAI_API_KEY = 'YOURAPIKEY' # replace this with your key. https://openai.com/blog/openai-api
)



oedata<-readRDS(here("Data","cleandata.rds"))

## keep only observations with open ended responses 
oedata <- oedata %>% filter(!is.na(manipulation))

# Create an empty dataframe with two rows for the output
output <- data.frame(matrix(ncol = 2, nrow = 1))

# Set column names if necessary
colnames(output) <- c("responseGPT", "obs")

## This is the first half of your prompt that includes instructions of what to do with 
# each response 

#q <- "Before the deadline for reaching the Federal debt limit in May of 2023, I asked Americans the following question `In one or two sentences, what do you think will happen if the government DOES NOT increase the debt ceiling?' I want you to code the following answer on a scale of 0-10. If the respondent thinks there will be no conseuqences, code the answer 0. If the respondent think there will be catastrophic conseuqences, code the answer 10. Use your judgement to place answers along this scale if the responses fall in between these two extremes. Response with 'NA' if there is insufficient information or the response is nonsensical. Reply only with the number or NA. Here is the response: "
q <- "Before the deadline for reaching the Federal debt limit in May of 2023, I asked Americans the following question `In one or two sentences, what do you think will happen if the government DOES NOT increase the debt ceiling?' I want you to code the following answer on a scale of 0-10 to reflect their certainty over what will happen if the US does not raise the debt ceiling. If the repondent is very certain about the outcome, code it a 10. If the respondent is very uncertain over what will happen, code a 0. Use your judgement to code between 0-10 for respondeses that lie between these two extremes. Code a response with 'NA' if there is insufficient information or the response is nonsensical. Reply only with the number or NA. Here is the response: "
print(q)

## create a sequence for the loop 
## I've set it hear to the number of rows in a dataframe. 
## You could also select random or set columns as you test.
seq <- seq(1, nrow(oedata))
#seq <- seq(1,10)
#seq <- sample(1:nrow(df2), 20, replace = TRUE)


## these setting are for retries if you
## reach the openai token call limit
maxRetries <- 10  # Maximum number of retries
retryDelay <- 1  # Initial delay in seconds

i = 10
for (i in seq) {
  # here we are pasting the instructions with the response you want judged. 
  query <- paste(q, oedata$manipulation[i], sep = "")
  # print(query) # print out to see your full prompt 
  
  # Tailor the token count to each query 
  # this saves money and prevents exceeding limits and slowing down the loop
  tokennum <- calculate_tokens(query)
  tokennum <- tokennum + 40 # I add 40 to allow for long responses. Reduce if output is numerical. 
  
  retryCount <- 0
  success <- FALSE
  
  while (!success && retryCount <= maxRetries) {
    tryCatch({
      response <- create_chat_completion(
        model = "gpt-4-0613", # set the openai model you want to use. 
        temperature = 0, # this sets how predictable the model will be, 0 means most predictable and lowest creativity
        max_tokens = tokennum, # this sets the maximum tokens used in the call. 
        messages = list(
          list(
            "role" = "user",
            "content" = query
          )
        )
      )
      
      gpt_answer <- response$choices$message.content # collect the response for each call. 
      new <- data.frame(matrix(ncol = 2, nrow = 1)) # create a new dataframe to merge the observation and GPT response
      colnames(new) <- c("responseGPT", "obs")  # rename the columns 
      new$responseGPT <- gpt_answer # paste response to this new dataframe 
      new$obs <- i              # assign observation ID to this new dataframe
      output <- rbind(output, new) # bind dataframe to existing output 
      print(gpt_answer) # print response from GPT
      print(i) # print observation number 
      
      ## the following tells R what to do incase you get a token limit error. 
      success <- TRUE
    }, error = function(e) {
      cat("An error occurred:", conditionMessage(e), "\n")
      
      if (retryCount <= maxRetries) {
        delay <- retryDelay * 2^retryCount
        cat("Retrying in", delay, "seconds...\n")
        Sys.sleep(delay)
        retryCount <- retryCount + 1
      } else {
        cat("Maximum number of retries reached. Skipping to the next iteration.\n")
      }
    })
  }
  #save(output, file = "continuoussave_openended.rda") # optional code to save the outputfile after ever run if program crashes. 
}

output <- output %>%
  mutate(new_numeric_column = parse_number(responseGPT))
output2 <- output[-1,]
treats <- oedata %>% select(treat, manipulation, partyid, certain, uncertain, control,age)
treats <- cbind(treats, output2)
#saveRDS(treats, "certaintycheck1.rds")

## To create the measures of uncertainty around the other variables we simply changed the scale we wanted to use. 





